# PoP - Policing Socio-Geographic Boundaries and Inequality
# script for creating final dataset of police stops for Chicago.

# load packages
suppressPackageStartupMessages(
  
  {
    library(dplyr)
    library(tidyverse)
    library(ggplot2)
    library(haven)
    library(readxl)
    library(readr)
    library(areal)
    library(car)
    library(estimatr)
    library(magrittr)
    library(texreg)
    library(sandwich)
    library(jtools)
  }
)


# load geocoded stop data
load("chistops_geo_final.RData")

# make binary numeric vars for key cols
table(chistops_geo_final$RACE_CODE_CD)

# RACE_CODE_CD of arrestee
chistops_geo_final = chistops_geo_final %>%
  mutate(r_black = ifelse(RACE_CODE_CD == "BLK", 1, 0)) %>%
  mutate(r_white = ifelse(RACE_CODE_CD == "WHT", 1, 0)) %>%
  mutate(r_hisp = ifelse(RACE_CODE_CD == "WBH" | RACE_CODE_CD == "WWH", 1, 0)) %>%
  mutate(r_asian = ifelse(RACE_CODE_CD == "WHI" | RACE_CODE_CD == "API", 1, 0)) %>%
  mutate(r_other = ifelse(RACE_CODE_CD == "I" | RACE_CODE_CD == "P", 1, 0)) %>%
  mutate(r_nonwhite = ifelse(RACE_CODE_CD == "BLK" | RACE_CODE_CD == "WBH" | RACE_CODE_CD == "WWH" | RACE_CODE_CD == "API" | RACE_CODE_CD == "WHI" | 
                               RACE_CODE_CD == "I" | RACE_CODE_CD == "P", 1, 0))

# create var for type of stop
table(chistops_geo_final$VEHICLE_INVOLVED_I)

chistops_geo_final$VEHICLE_INVOLVED_I <- ifelse(chistops_geo_final$VEHICLE_INVOLVED_I == "Y", 1, 0)
table(chistops_geo_final$VEHICLE_INVOLVED_I)

chistops_geo_final$traffic <- ifelse(chistops_geo_final$VEHICLE_INVOLVED_I == "Y", 1, 0)

# load in final chi dataset
load("chi_final.RData")
# check crs for final df
st_crs(chi_fin)
# EPSG:4269 on NAD83


# so set initial coords to ESPG: 4269 
st_crs(chistops_geo_final)
st_crs(chistops_geo_final) <- 4269

# then transform to EPSG:4269 (on NAD83) to match chi_fin
chistops_geo_final$geometry = st_transform(chistops_geo_final$geometry, "EPSG:4269")


# set CRS between chi stops and chi_fin
st_crs(chistops_geo_final) = st_crs(chi_fin)
chistops_geo_final = st_transform(chistops_geo_final, st_crs(chi_fin))


save(chistops_geo_final, file = "chistops_geo_final.RData")


chi_stops = chistops_geo_final



# find intersection between stop data and chi blk groups 
chi_ints = st_intersects(chi_stops, chi_fin)

# now filter by race of arrestee
chi_ints2a = st_intersects(chi_stops %>% filter(r_black == 1), chi_fin)
chi_ints2b = st_intersects(chi_stops %>% filter(r_hisp == 1), chi_fin)
chi_ints2c = st_intersects(chi_stops %>% filter(r_white == 1), chi_fin)
chi_ints2d = st_intersects(chi_stops %>% filter(r_asian == 1), chi_fin)
chi_ints2e = st_intersects(chi_stops %>% filter(r_nonwhite == 1), chi_fin)

# sum total stops by blk
theout = chi_fin[chi_ints %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>% 
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_total = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

# sum total stops by race
theout2a = chi_fin[chi_ints2a %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_black = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2b = chi_fin[chi_ints2b %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_latino = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2c = chi_fin[chi_ints2c %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_white = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2d = chi_fin[chi_ints2d %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_asian = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2e = chi_fin[chi_ints2e %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(all_stops_nonwhite = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

# remerge with chi_fin

chi_fin2 = merge(chi_fin, theout, by = "BLK_CODE", all.x = TRUE)
chi_fin2 = merge(chi_fin2, theout2a, by = "BLK_CODE", all.x = TRUE)
chi_fin2 = merge(chi_fin2, theout2b, by = "BLK_CODE", all.x = TRUE)
chi_fin2 = merge(chi_fin2, theout2c, by = "BLK_CODE", all.x = TRUE)
chi_fin2 = merge(chi_fin2, theout2d, by = "BLK_CODE", all.x = TRUE)
chi_fin2 = merge(chi_fin2, theout2e, by = "BLK_CODE", all.x = TRUE)


# now filter by just pedestrian stops
# find intersection between stop data and chitin blk groups 
chi_stops$traffic <- ifelse(chi_stops$VEHICLE_INVOLVED_I == 1, 1, 0)

chi_ints2 = st_intersects(chi_stops %>% filter(traffic == 0), chi_fin)



# now filter by race of arrestee
chi_ints3a = st_intersects(chi_stops %>% filter(traffic == 0 & r_black == 1), chi_fin)
chi_ints3b = st_intersects(chi_stops %>% filter(traffic == 0 & r_hisp == 1), chi_fin)
chi_ints3c = st_intersects(chi_stops %>% filter(traffic == 0 & r_white == 1), chi_fin)
chi_ints3d = st_intersects(chi_stops %>% filter(traffic == 0 & r_asian == 1), chi_fin)
chi_ints3e = st_intersects(chi_stops %>% filter(traffic == 0 & r_nonwhite == 1), chi_fin)

# sum total stops by blk
theout2 = chi_fin[chi_ints2 %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>% 
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_total = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

# sum total stops by race
theout3a = chi_fin[chi_ints3a %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_black = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout3b = chi_fin[chi_ints3b %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_latino = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout3c = chi_fin[chi_ints3c %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_white = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout3d = chi_fin[chi_ints3d %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_asian = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout3e = chi_fin[chi_ints3e %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(ped_stops_nonwhite = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

# remerge with chi_fin

chi_fin3 = merge(chi_fin2, theout2, by = "BLK_CODE", all.x = TRUE)
chi_fin3 = merge(chi_fin3, theout3a, by = "BLK_CODE", all.x = TRUE)
chi_fin3 = merge(chi_fin3, theout3b, by = "BLK_CODE", all.x = TRUE)
chi_fin3 = merge(chi_fin3, theout3c, by = "BLK_CODE", all.x = TRUE)
chi_fin3 = merge(chi_fin3, theout3d, by = "BLK_CODE", all.x = TRUE)
chi_fin3 = merge(chi_fin3, theout3e, by = "BLK_CODE", all.x = TRUE)


# convert NAs to 0s
# all stops
chi_fin3$all_stops_total <- ifelse(is.na(chi_fin3$all_stops_total), 0, chi_fin3$all_stops_total)
chi_fin3$all_stops_black <- ifelse(is.na(chi_fin3$all_stops_black), 0, chi_fin3$all_stops_black)
chi_fin3$all_stops_white <- ifelse(is.na(chi_fin3$all_stops_white), 0, chi_fin3$all_stops_white)
chi_fin3$all_stops_latino <- ifelse(is.na(chi_fin3$all_stops_latino), 0, chi_fin3$all_stops_latino)
chi_fin3$all_stops_asian <- ifelse(is.na(chi_fin3$all_stops_asian), 0, chi_fin3$all_stops_asian)
chi_fin3$all_stops_nonwhite <- ifelse(is.na(chi_fin3$all_stops_nonwhite), 0, chi_fin3$all_stops_nonwhite)


# ped stops
chi_fin3$ped_stops_total <- ifelse(is.na(chi_fin3$ped_stops_total), 0, chi_fin3$ped_stops_total)
chi_fin3$ped_stops_black <- ifelse(is.na(chi_fin3$ped_stops_black), 0, chi_fin3$ped_stops_black)
chi_fin3$ped_stops_white <- ifelse(is.na(chi_fin3$ped_stops_white), 0, chi_fin3$ped_stops_white)
chi_fin3$ped_stops_latino <- ifelse(is.na(chi_fin3$ped_stops_latino), 0, chi_fin3$ped_stops_latino)
chi_fin3$ped_stops_asian <- ifelse(is.na(chi_fin3$ped_stops_asian), 0, chi_fin3$ped_stops_asian)
chi_fin3$ped_stops_nonwhite <- ifelse(is.na(chi_fin3$ped_stops_nonwhite), 0, chi_fin3$ped_stops_nonwhite)

chi_stops_final = chi_fin3

# save
save(x = chi_stops_final, file = "chi_stops_final.RData")

# now create binned racial blv measures
chi_stops_final <- chi_stops_final %>% mutate(boundary_quart = quantile(chi_stops_final$p_race_white_blv, prob=.75, na.rm=TRUE),
                                              boundary_quart_dummy = ifelse(p_race_white_blv > boundary_quart,1,0 ))

# now run analyses

# log and +1 to variables (pop already logged)
chi_stops_final = chi_stops_final %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lmisdemeanors = log(misdemeanor_arrests + 1),
         lfelonies = log(felony_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lviolent = log(violent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1),
         lcrime = log(crime_all + 1),
         lpropertycrime = log(property_crime + 1),
         lviolentcrime = log(violent_crime + 1))

# now for new stop vars
chi_stops_final = chi_stops_final %>%
  mutate(lall_stops_total = log(all_stops_total + 1),
         lall_stops_black = log(all_stops_black + 1),
         lall_stops_latino = log(all_stops_latino + 1),
         lall_stops_white = log(all_stops_white + 1),
         lall_stops_asian = log(all_stops_asian + 1),
         lall_stops_nonwhite = log(all_stops_nonwhite + 1),
         lped_stops_total = log(ped_stops_total + 1),
         lped_stops_black = log(ped_stops_black + 1),
         lped_stops_latino = log(ped_stops_latino + 1),
         lped_stops_white = log(ped_stops_white + 1),
         lped_stops_asian = log(ped_stops_asian + 1),
         lped_stops_nonwhite = log(ped_stops_nonwhite + 1))

# create scaled DVs (to account for different pop. sizes and density in blocks)
chi_stops_final$larrest_sd <- chi_stops_final$larrests - (mean(chi_stops_final$larrests)/sd(chi_stops_final$larrests))
chi_stops_final$lmisdemeanors_sd <- chi_stops_final$lmisdemeanors - (mean(chi_stops_final$lmisdemeanors)/sd(chi_stops_final$lmisdemeanors))
chi_stops_final$lfelonies_sd <- chi_stops_final$lfelonies - (mean(chi_stops_final$lfelonies)/sd(chi_stops_final$lfelonies))
chi_stops_final$lnonviolent_sd <- chi_stops_final$lnonviolent - (mean(chi_stops_final$lnonviolent)/sd(chi_stops_final$lnonviolent))
chi_stops_final$lviolent_sd <- chi_stops_final$lviolent - (mean(chi_stops_final$lviolent)/sd(chi_stops_final$lviolent))
chi_stops_final$lsociety_sd <- chi_stops_final$lsociety - (mean(chi_stops_final$lsociety)/sd(chi_stops_final$lsociety))
chi_stops_final$lperson_sd <- chi_stops_final$lperson - (mean(chi_stops_final$lperson)/sd(chi_stops_final$lperson))
chi_stops_final$lproperty_sd <- chi_stops_final$lproperty - (mean(chi_stops_final$lproperty)/sd(chi_stops_final$lproperty))

# create scaled DVs for stop vars
chi_stops_final$lall_stops_total_sd <- chi_stops_final$lall_stops_total - (mean(chi_stops_final$lall_stops_total)/sd(chi_stops_final$lall_stops_total))
chi_stops_final$lall_stops_black_sd <- chi_stops_final$lall_stops_black - (mean(chi_stops_final$lall_stops_black)/sd(chi_stops_final$lall_stops_black))
chi_stops_final$lall_stops_latino_sd <- chi_stops_final$lall_stops_latino - (mean(chi_stops_final$lall_stops_latino)/sd(chi_stops_final$lall_stops_latino))
chi_stops_final$lall_stops_white_sd <- chi_stops_final$lall_stops_white - (mean(chi_stops_final$lall_stops_white)/sd(chi_stops_final$lall_stops_white))
chi_stops_final$lall_stops_asian_sd <- chi_stops_final$lall_stops_asian - (mean(chi_stops_final$lall_stops_asian)/sd(chi_stops_final$lall_stops_asian))
chi_stops_final$lall_stops_nonwhite_sd <- chi_stops_final$lall_stops_nonwhite - (mean(chi_stops_final$lall_stops_nonwhite)/sd(chi_stops_final$lall_stops_nonwhite))

chi_stops_final$lped_stops_total_sd <- chi_stops_final$lped_stops_total - (mean(chi_stops_final$lped_stops_total)/sd(chi_stops_final$lped_stops_total))
chi_stops_final$lped_stops_black_sd <- chi_stops_final$lped_stops_black - (mean(chi_stops_final$lped_stops_black)/sd(chi_stops_final$lped_stops_black))
chi_stops_final$lped_stops_latino_sd <- chi_stops_final$lped_stops_latino - (mean(chi_stops_final$lped_stops_latino)/sd(chi_stops_final$lped_stops_latino))
chi_stops_final$lped_stops_white_sd <- chi_stops_final$lped_stops_white - (mean(chi_stops_final$lped_stops_white)/sd(chi_stops_final$lped_stops_white))
chi_stops_final$lped_stops_asian_sd <- chi_stops_final$lped_stops_asian - (mean(chi_stops_final$lped_stops_asian)/sd(chi_stops_final$lped_stops_asian))
chi_stops_final$lped_stops_nonwhite_sd <- chi_stops_final$lped_stops_nonwhite - (mean(chi_stops_final$lped_stops_nonwhite)/sd(chi_stops_final$lped_stops_nonwhite))


save(x = chi_stops_final, file = "chi_stops_final.RData")

